package com.dc.project.util.reader;

import com.dc.project.util.reader.base.BaseReaderUtil;
import com.dc.project.util.reader.base.FileReaderModel;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

/**
 * WordReaderUtil
 *
 * @author chenxueli
 * @date 2025-03-10 15:16:00
 */
public class WordReaderUtil implements BaseReaderUtil {

    /**
     * 读取文件内容
     *
     * @param file 文件
     * @return 文件内容
     */
    @Override
    public FileReaderModel read(File file) {
        try (var fis = new FileInputStream(file)) {
            var fileName = file.getName().toLowerCase();
            if (fileName.endsWith(".docx")) {
                // 处理.docx文件
                try (var document = new XWPFDocument(fis)) {
                    var extractor = new XWPFWordExtractor(document);
                    return FileReaderModel.newInstance(extractor.getText());
                }
            } else if (fileName.endsWith(".doc")) {
                // 处理.doc文件
                try (var document = new HWPFDocument(fis)) {
                    var extractor = new WordExtractor(document);
                    return FileReaderModel.newInstance(extractor.getText());
                }
            }
            return FileReaderModel.EMPTY;
        } catch (IOException e) {
            return FileReaderModel.EMPTY;
        }
    }

}
